Data and Set Up
Show code
library (tidyverse)
library (dplyr)
library (kableExtra)
library (DT)
a_names_data <- read_csv (here:: here ("Labs" , "Lab 9" , "BabyNames" , "StateNames_A.csv" ))
Data
Show code
# source: https://rstudio.github.io/DT/
datatable (a_names_data, class = "display" ,
callback = JS ("return table;" ), filter = "top" ,
escape = TRUE ,
style = "auto" ,
selection = "multiple" , extensions = list (),
editable = FALSE )
Plot
Removed excess grid lines, chose colors, chose fonts, and moved plot title, as well as added a caption.
Show code
allison_f <- a_names_data |>
filter (Gender == 'F' ) |>
pivot_wider (names_from = Gender,
values_from = Count) |>
mutate (F = replace_na (F, 0 )) |>
filter (Name == 'Allison' ) |>
group_by (Year) |>
summarize (F = sum (F), .groups = 'rowwise' ) |>
distinct ()
ggplot (data = allison_f, aes (x = Year, y = F)) +
geom_line (color = 'darkgreen' ) +
theme ( plot.title.position = "plot" ,
plot.title = element_text (vjust = 1 ,
face = "italic" ,
family = "serif" ),
plot.subtitle = element_text (family = 'serif' ,
face = 'italic' ),
axis.line = element_line (color = 'lightgray' ,
linewidth = 1 ,
linetype = 'solid' ),
panel.grid.major = element_blank ()
) +
labs ( title = "Number of babies named Allison over Time" ,
subtitle = 'in the US' ,
x = " " , y = " " ) +
scale_x_continuous (breaks = seq (1997 , 2015 , 2 )) +
scale_y_continuous (breaks = seq (5000 , 8000 , 500 ))
Allison Table
Show code
x <- a_names_data |>
pivot_wider (names_from = Gender,
values_from = Count) |>
mutate (across (.cols = c (F, M),
~ (replace_na (., 0 )))) |>
filter (Name == 'Allison' ) |>
group_by (State) |>
summarize ( F = sum (F), M = sum (M), .groups = "rowwise" ) |>
distinct ()
knitr:: kable (x, col.names = c ('State' , 'Female born babies named Allison' , 'Male born babies named Allison' ), 'pipe' , align = 'lccr' )
AK
232
0
AL
1535
0
AR
1198
0
AZ
1880
0
CA
12413
0
CO
1594
0
CT
1099
0
DC
321
0
DE
294
0
FL
4455
0
GA
3257
0
HI
183
0
IA
1477
0
ID
451
0
IL
5110
0
IN
3067
0
KS
1283
0
KY
1905
20
LA
1209
0
MA
2218
0
MD
2229
0
ME
340
0
MI
4014
0
MN
2374
0
MO
2882
0
MS
817
0
MT
226
0
NC
3435
0
ND
285
0
NE
807
0
NH
412
0
NJ
3052
0
NM
399
0
NV
729
0
NY
5747
0
OH
5487
0
OK
1421
0
OR
1186
0
PA
4307
0
RI
306
0
SC
1228
0
SD
376
0
TN
2488
0
TX
10192
0
UT
1125
0
VA
3220
0
VT
135
0
WA
1956
0
WI
2367
0
WV
813
0
WY
142
0
Filtering for female sex only and creating new data set for graphing.
Show code
allison_f <- a_names_data |>
filter (Gender == 'F' ) |>
pivot_wider (names_from = Gender,
values_from = Count) |>
mutate (F = replace_na (F, 0 )) |>
filter (Name == 'Allison' ) |>
group_by (Year) |>
summarize (F = sum (F), .groups = 'rowwise' ) |>
distinct ()
knitr:: kable (allison_f, col.names = c ('Year' , 'Number of Babies named Allison' ), 'pipe' , align = 'lc' )
1997
7274
1998
7861
1999
7023
2000
6314
2001
6209
2002
6237
2003
5850
2004
5871
2005
5631
2006
5560
2007
5450
2008
6237
2009
6579
2010
5856
2011
5453
2012
5411
2013
5422
2014
5440
Spelling by State
Show code
al <- a_names_data |>
filter (Name == c ('Allan' , 'Alan' , 'Allen' ))
al_year <- al |>
group_by (Year, Name) |>
summarize (Count = sum (Count),
.groups = 'rowwise' )
kable (al_year, format = 'html' ,
col.names = c ('Year' , 'Name' ,
'Number of babies named as such'
),
align = 'lcl' ) |>
kable_styling ('striped' ) |>
kable_classic (html_font = 'Arial' )
Year
Name
Number of babies named as such
1997
Alan
311
1997
Allan
251
1997
Allen
215
1998
Alan
384
1998
Allan
85
1998
Allen
603
1999
Alan
311
1999
Allan
69
1999
Allen
239
2000
Alan
1054
2000
Allan
84
2000
Allen
193
2001
Alan
752
2001
Allan
25
2001
Allen
477
2002
Alan
1127
2002
Allan
180
2002
Allen
217
2003
Alan
818
2003
Allan
75
2003
Allen
547
2004
Alan
323
2004
Allan
281
2004
Allen
379
2005
Alan
564
2005
Allan
37
2005
Allen
225
2006
Alan
538
2006
Allan
244
2006
Allen
356
2007
Alan
1172
2007
Allan
108
2007
Allen
502
2008
Alan
681
2008
Allan
294
2008
Allen
417
2009
Alan
1334
2009
Allan
79
2009
Allen
354
2010
Alan
1483
2010
Allan
91
2010
Allen
424
2011
Alan
1088
2011
Allan
187
2011
Allen
521
2012
Alan
431
2012
Allan
80
2012
Allen
244
2013
Alan
509
2013
Allan
105
2013
Allen
159
2014
Alan
476
2014
Allan
13
2014
Allen
433
Show code
al_state <-
a_names_data |>
pivot_wider (names_from = Gender,
values_from = Count) |>
pivot_wider (names_from = Name,
values_from = M) |>
filter (Year == 2000 , (State == 'PA' | State == 'CA' )) |>
select (State, Allan, Alan, Allen) |>
group_by (State)|>
summarise (across (.cols = c (Allan, Alan, Allen),
~ sum (., na.rm = TRUE )),
.groups = 'rowwise' )
knitr:: kable (al_state,
col.names = c ('State' ,
'Babies named Allan' ,
'Babies named Alan' ,
'Babies named Allen' ),
'pipe' ,
align = 'lllll' , )
CA
131
579
176
PA
12
51
56
Show code
al_state_prop <- al_state |>
mutate (Tot = (Allan + Alan + Allen),
across (.cols = c (Allan, Alan, Allen, Tot),
.fns = ~ format (round ((./ Tot), 3
), nsmall = 3 )))
knitr:: kable (al_state_prop,
col.names = c ('State' ,
'Proportion of Babies born named Allan' ,
'Proportion of Babies born, named Alan' ,
'Proportion of Babies born, named Allen' ,
'Total' ), 'pipe' )
CA
0.148
0.653
0.199
1.000
PA
0.101
0.429
0.471
1.000